#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long;
use FindBin qw($RealBin);
use lib "$FindBin::RealBin/../source";
use CF::Constants;
use CF::Helpers;

##########################################################################
# Copyright 2014, Philip Ewels (phil.ewels@scilifelab.se)                #
#                                                                        #
# This file is part of Cluster Flow.                                     #
#                                                                        #
# Cluster Flow is free software: you can redistribute it and/or modify   #
# it under the terms of the GNU General Public License as published by   #
# the Free Software Foundation, either version 3 of the License, or      #
# (at your option) any later version.                                    #
#                                                                        #
# Cluster Flow is distributed in the hope that it will be useful,        #
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
# GNU General Public License for more details.                           #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with Cluster Flow.  If not, see <http://www.gnu.org/licenses/>.  #
##########################################################################

# Module requirements
my %requirements = (
	'cores' 	=> '1',
	'memory' 	=> '2G',
	'modules' 	=> 'bismark',
	'time' 		=> '20'
);

# Help text
my $helptext = "".("-"x22)."\n Bismark Pipeline Summary Module\n".("-"x22)."\n
The summary_bismark_pipeline module takes all run files from a Cluster
Flow pipeline and looks for log files generated by bismark processing.
It reads these, looking for certain values, and generates a .csv and .html
summary report of the numbers for all samples processed in the pipeline.\n\n";

# Setup
my %cf = CF::Helpers::module_start(\%requirements, $helptext);

# MODULE
my $timestart = time;

open (RUN,'>>',$cf{'run_fn'}) or die "###CF Error: Can't write to $cf{run_fn}: $!";

# Print version information about the module.
my $version = `bismark2summary --version`;
warn "---------- bismark2summary version information ----------\n";
warn $version;
warn "\n------- End of bismark2summary version information ------\n";
if($version =~ /bismark2summary version: v([\d\.]+)/){
  warn "###CFVERS bismark2summary\t$1\n\n";
}

# Find the original bismark aligned BAM files
my $pipeline_id = $cf{'pipeline_id'};
my @bam_files;
while( my( $job_id, $files ) = each %{$cf{'files'}} ){
    if($job_id =~ m/^cf_${pipeline_id}_bismark_align_\d{1,3}$/){
    	push(@bam_files, @{$files});
	}
}

my $num_samples = scalar @bam_files;
if($num_samples == 0){
    die("###CFSUMMARY Error: No bismark BAM files found for bismark project summary (searching for cf_${pipeline_id}_bismark_align_###).\n");
} else {
    warn("Found $num_samples bismark BAM files..\n");
}

# Run bismark2summary
my $output_fn = "${pipeline_id}_bismark_summary_report.html";
my $command = "bismark2summary --title $pipeline_id -o $output_fn ".join(' ', @bam_files);
warn "\n###CFCMD $command\n\n";

if(!system ($command)){
    if(-e $output_fn){
        print RUN "$cf{job_id}\t$output_fn\n";
    } else {
        warn "\n###CF Error! bismark2summary report '$output_fn' not found..\n";
    }
	my $duration =  CF::Helpers::parse_seconds(time - $timestart);
	warn "###CF bismark2summary successfully ran, took $duration\n";
} else {
	print "###CF Error! bismark2summary exited with an error state: $? $!\n";
}

close (RUN);
